#ifdef __amd64__

#include <stdlib.h>
#include "ibm.h"
#include "cpu.h"
#include "x86.h"
#include "x86_flags.h"
#include "x86_ops.h"
#include "x87.h"
#include "mem.h"

#include "386_common.h"

#include "codegen.h"
#include "codegen_ops.h"
#include "codegen_ops_x86-64.h"

#if defined(__linux__) || defined(__APPLE__)
#include <sys/mman.h>
#include <unistd.h>
#endif
#if WIN64
#include <windows.h>
#endif

int codegen_flat_ds, codegen_flat_ss;
int codegen_flags_changed = 0;
int codegen_fpu_entered = 0;
int codegen_fpu_loaded_iq[8];
int codegen_reg_loaded[8];
x86seg *op_ea_seg;
int op_ssegs;
uint32_t op_old_pc;

uint32_t recomp_page = -1;

int host_reg_mapping[NR_HOST_REGS];
int host_reg_xmm_mapping[NR_HOST_XMM_REGS];
codeblock_t *codeblock;
codeblock_t **codeblock_hash;
int codegen_mmx_entered = 0;

int block_current = 0;
static int block_num;
int block_pos;

int cpu_recomp_flushes, cpu_recomp_flushes_latched;
int cpu_recomp_evicted, cpu_recomp_evicted_latched;
int cpu_recomp_reuse, cpu_recomp_reuse_latched;
int cpu_recomp_removed, cpu_recomp_removed_latched;

uint32_t codegen_endpc;

int codegen_block_cycles;
static int codegen_block_ins;
static int codegen_block_full_ins;

static uint32_t last_op32;
static x86seg *last_ea_seg;
static int last_ssegs;

void codegen_init() {
        int c;

#if defined(__linux__) || defined(__APPLE__)
        void *start;
        size_t len;
        long pagesize = sysconf(_SC_PAGESIZE);
        long pagemask = ~(pagesize - 1);
#endif

#if WIN64
        codeblock = VirtualAlloc(NULL, BLOCK_SIZE * sizeof(codeblock_t), MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
        codeblock = malloc(BLOCK_SIZE * sizeof(codeblock_t));
#endif
        codeblock_hash = malloc(HASH_SIZE * sizeof(codeblock_t *));

        memset(codeblock, 0, BLOCK_SIZE * sizeof(codeblock_t));
        memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *));

        for (c = 0; c < BLOCK_SIZE; c++)
                codeblock[c].pc = BLOCK_PC_INVALID;

#if defined(__linux__) || defined(__APPLE__)
        start = (void *)((long)codeblock & pagemask);
        len = ((BLOCK_SIZE * sizeof(codeblock_t)) + pagesize) & pagemask;
        if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) {
                perror("mprotect");
                exit(-1);
        }
#endif
        //        pclog("Codegen is %p\n", (void *)pages[0xfab12 >> 12].block);
}

void codegen_reset() {
        int c;

        memset(codeblock, 0, BLOCK_SIZE * sizeof(codeblock_t));
        memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *));
        mem_reset_page_blocks();

        for (c = 0; c < BLOCK_SIZE; c++)
                codeblock[c].pc = BLOCK_PC_INVALID;
}

void dump_block() {
        /*        codeblock_t *block = pages[0x119000 >> 12].block;

                pclog("dump_block:\n");
                while (block)
                {
                        uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff);
                        uint32_t end_pc = (block->endpc & 0xffc) | (block->phys & ~0xfff);
                        pclog(" %p : %08x-%08x  %08x-%08x %p %p\n", (void *)block, start_pc, end_pc,  block->pc, block->endpc,
           (void *)block->prev, (void *)block->next); if (!block->pc) fatal("Dead PC=0\n");

                        block = block->next;
                }
                pclog("dump_block done\n");*/
}

static void add_to_block_list(codeblock_t *block) {
        codeblock_t *block_prev = pages[block->phys >> 12].block[(block->phys >> 10) & 3];

        if (!block->page_mask)
                fatal("add_to_block_list - mask = 0\n");

        if (block_prev) {
                block->next = block_prev;
                block_prev->prev = block;
                pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block;
        } else {
                block->next = NULL;
                pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block;
        }

        if (block->next) {
                if (block->next->pc == BLOCK_PC_INVALID)
                        fatal("block->next->pc=BLOCK_PC_INVALID %p %p %x %x\n", (void *)block->next, (void *)codeblock,
                              block_current, block_pos);
        }

        if (block->page_mask2) {
                block_prev = pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3];

                if (block_prev) {
                        block->next_2 = block_prev;
                        block_prev->prev_2 = block;
                        pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block;
                } else {
                        block->next_2 = NULL;
                        pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block;
                }
        }
}

static void remove_from_block_list(codeblock_t *block, uint32_t pc) {
        if (!block->page_mask)
                return;

        if (block->prev) {
                block->prev->next = block->next;
                if (block->next)
                        block->next->prev = block->prev;
        } else {
                pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block->next;
                if (block->next)
                        block->next->prev = NULL;
                else
                        mem_flush_write_page(block->phys, 0);
        }
        if (!block->page_mask2) {
                if (block->prev_2 || block->next_2)
                        fatal("Invalid block_2\n");
                return;
        }

        if (block->prev_2) {
                block->prev_2->next_2 = block->next_2;
                if (block->next_2)
                        block->next_2->prev_2 = block->prev_2;
        } else {
                //                pclog(" pages.block_2=%p 3 %p %p\n", (void *)block->next_2, (void *)block, (void
                //                *)pages[block->phys_2 >> 12].block_2);
                pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block->next_2;
                if (block->next_2)
                        block->next_2->prev_2 = NULL;
                else
                        mem_flush_write_page(block->phys_2, 0);
        }
}

static void delete_block(codeblock_t *block) {
        uint32_t old_pc = block->pc;

        if (block == codeblock_hash[HASH(block->phys)])
                codeblock_hash[HASH(block->phys)] = NULL;

        if (block->pc == BLOCK_PC_INVALID)
                fatal("Deleting deleted block\n");
        block->pc = BLOCK_PC_INVALID;

        codeblock_tree_delete(block);
        remove_from_block_list(block, old_pc);
}

void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) {
        struct codeblock_t *block = page->block[(phys_addr >> 10) & 3];

        while (block) {
                if (mask & block->page_mask) {
                        delete_block(block);
                        cpu_recomp_evicted++;
                }
                if (block == block->next)
                        fatal("Broken 1\n");
                block = block->next;
        }

        block = page->block_2[(phys_addr >> 10) & 3];

        while (block) {
                if (mask & block->page_mask2) {
                        delete_block(block);
                        cpu_recomp_evicted++;
                }
                if (block == block->next_2)
                        fatal("Broken 2\n");
                block = block->next_2;
        }
}

void codegen_block_init(uint32_t phys_addr) {
        codeblock_t *block;
        page_t *page = &pages[phys_addr >> 12];

        if (!page->block[(phys_addr >> 10) & 3])
                mem_flush_write_page(phys_addr, cs + cpu_state.pc);

        block_current = (block_current + 1) & BLOCK_MASK;
        block = &codeblock[block_current];

        //        if (block->pc == 0xb00b4ff5)
        //                pclog("Init target block\n");
        if (block->pc != BLOCK_PC_INVALID) {
                //                pclog("Reuse block : was %08x now %08x\n", block->pc, cs+pc);
                delete_block(block);
                cpu_recomp_reuse++;
        }
        block_num = HASH(phys_addr);
        codeblock_hash[block_num] = &codeblock[block_current];

        block->ins = 0;
        block->pc = cs + cpu_state.pc;
        block->_cs = cs;
        block->pnt = block_current;
        block->phys = phys_addr;
        block->dirty_mask = &page->dirty_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK];
        block->dirty_mask2 = NULL;
        block->next = block->prev = NULL;
        block->next_2 = block->prev_2 = NULL;
        block->page_mask = 0;
        block->flags = 0;
        block->status = cpu_cur_status;

        block->was_recompiled = 0;

        recomp_page = block->phys & ~0xfff;

        codeblock_tree_add(block);
}

void codegen_block_start_recompile(codeblock_t *block) {
        page_t *page = &pages[block->phys >> 12];

        if (!page->block[(block->phys >> 10) & 3])
                mem_flush_write_page(block->phys, cs + cpu_state.pc);

        block_num = HASH(block->phys);
        block_current = block->pnt;

        if (block->pc != cs + cpu_state.pc || block->was_recompiled)
                fatal("Recompile to used block!\n");

        block->status = cpu_cur_status;

        block_pos = BLOCK_GPF_OFFSET;
#if WIN64
        addbyte(0x48); /*XOR RCX, RCX*/
        addbyte(0x31);
        addbyte(0xc9);
        addbyte(0x31); /*XOR EDX, EDX*/
        addbyte(0xd2);
#else
        addbyte(0x48); /*XOR RDI, RDI*/
        addbyte(0x31);
        addbyte(0xff);
        addbyte(0x31); /*XOR ESI, ESI*/
        addbyte(0xf6);
#endif
        call(block, (uintptr_t)x86gpf);
        while (block_pos < BLOCK_EXIT_OFFSET)
                addbyte(0x90);         /*NOP*/
        block_pos = BLOCK_EXIT_OFFSET; /*Exit code*/
        addbyte(0x48);                 /*ADDL $40,%rsp*/
        addbyte(0x83);
        addbyte(0xC4);
        addbyte(0x28);
        addbyte(0x41); /*POP R15*/
        addbyte(0x5f);
        addbyte(0x41); /*POP R14*/
        addbyte(0x5e);
        addbyte(0x41); /*POP R13*/
        addbyte(0x5d);
        addbyte(0x41); /*POP R12*/
        addbyte(0x5c);
        addbyte(0x5f); /*POP RDI*/
        addbyte(0x5e); /*POP RSI*/
        addbyte(0x5d); /*POP RBP*/
        addbyte(0x5b); /*POP RDX*/
        addbyte(0xC3); /*RET*/
        cpu_block_end = 0;
        block_pos = 0; /*Entry code*/
        addbyte(0x53); /*PUSH RBX*/
        addbyte(0x55); /*PUSH RBP*/
        addbyte(0x56); /*PUSH RSI*/
        addbyte(0x57); /*PUSH RDI*/
        addbyte(0x41); /*PUSH R12*/
        addbyte(0x54);
        addbyte(0x41); /*PUSH R13*/
        addbyte(0x55);
        addbyte(0x41); /*PUSH R14*/
        addbyte(0x56);
        addbyte(0x41); /*PUSH R15*/
        addbyte(0x57);
        addbyte(0x48); /*SUBL $40,%rsp*/
        addbyte(0x83);
        addbyte(0xEC);
        addbyte(0x28);
        addbyte(0x48); /*MOVL RBP, &cpu_state*/
        addbyte(0xBD);
        addquad(((uintptr_t)&cpu_state) + 128);

        //        pclog("New block %i for %08X   %03x\n", block_current, cs+pc, block_num);

        last_op32 = -1;
        last_ea_seg = NULL;
        last_ssegs = -1;

        codegen_block_cycles = 0;
        codegen_timing_block_start();

        codegen_block_ins = 0;
        codegen_block_full_ins = 0;

        recomp_page = block->phys & ~0xfff;

        codegen_flags_changed = 0;
        codegen_fpu_entered = 0;
        codegen_mmx_entered = 0;

        codegen_fpu_loaded_iq[0] = codegen_fpu_loaded_iq[1] = codegen_fpu_loaded_iq[2] = codegen_fpu_loaded_iq[3] =
                codegen_fpu_loaded_iq[4] = codegen_fpu_loaded_iq[5] = codegen_fpu_loaded_iq[6] = codegen_fpu_loaded_iq[7] = 0;

        _ds.checked = _es.checked = _fs.checked = _gs.checked = (cr0 & 1) ? 0 : 1;

        codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] = codegen_reg_loaded[4] =
                codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0;

        block->was_recompiled = 1;

        codegen_flat_ds = !(cpu_cur_status & CPU_STATUS_NOTFLATDS);
        codegen_flat_ss = !(cpu_cur_status & CPU_STATUS_NOTFLATSS);
}

void codegen_block_remove() {
        codeblock_t *block = &codeblock[block_current];

        delete_block(block);
        cpu_recomp_removed++;

        recomp_page = -1;
}

void codegen_block_generate_end_mask() {
        codeblock_t *block = &codeblock[block_current];
        uint32_t start_pc;
        uint32_t end_pc;

        block->endpc = codegen_endpc;

        block->page_mask = 0;
        start_pc = (block->pc & 0x3ff) & ~15;
        if ((block->pc ^ block->endpc) & ~0x3ff)
                end_pc = 0x3ff & ~15;
        else
                end_pc = (block->endpc & 0x3ff) & ~15;
        if (end_pc < start_pc)
                end_pc = 0x3ff;
        start_pc >>= PAGE_MASK_SHIFT;
        end_pc >>= PAGE_MASK_SHIFT;

        //        pclog("block_end: %08x %08x\n", start_pc, end_pc);
        for (; start_pc <= end_pc; start_pc++) {
                block->page_mask |= ((uint64_t)1 << start_pc);
                //                pclog("  %08x %llx\n", start_pc, block->page_mask);
        }

        pages[block->phys >> 12].code_present_mask[(block->phys >> 10) & 3] |= block->page_mask;

        block->phys_2 = -1;
        block->page_mask2 = 0;
        block->next_2 = block->prev_2 = NULL;
        if ((block->pc ^ block->endpc) & ~0x3ff) {
                block->phys_2 = get_phys_noabrt(block->endpc);
                if (block->phys_2 != -1) {
                        page_t *page_2 = &pages[block->phys_2 >> 12];

                        start_pc = 0;
                        end_pc = (block->endpc & 0x3ff) >> PAGE_MASK_SHIFT;
                        for (; start_pc <= end_pc; start_pc++)
                                block->page_mask2 |= ((uint64_t)1 << start_pc);
                        page_2->code_present_mask[(block->phys_2 >> 10) & 3] |= block->page_mask2;

                        if (!pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3])
                                mem_flush_write_page(block->phys_2, block->endpc);

                        if (!block->page_mask2)
                                fatal("!page_mask2\n");
                        if (block->next_2) {
                                //                        pclog("  next_2->pc=%08x\n", block->next_2->pc);
                                if (block->next_2->pc == BLOCK_PC_INVALID)
                                        fatal("block->next_2->pc=BLOCK_PC_INVALID %p\n", (void *)block->next_2);
                        }

                        block->dirty_mask2 = &page_2->dirty_mask[(block->phys_2 >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK];
                }
        }

        //        pclog("block_end: %08x %08x %016llx\n", block->pc, block->endpc, block->page_mask);
        recomp_page = -1;
}

void codegen_block_end() {
        codeblock_t *block = &codeblock[block_current];

        codegen_block_generate_end_mask();
        add_to_block_list(block);
}

void codegen_block_end_recompile(codeblock_t *block) {
        codegen_timing_block_end();

        if (codegen_block_cycles) {
                addbyte(0x81); /*SUB $codegen_block_cycles, cyclcs*/
                addbyte(0x6d);
                addbyte((uint8_t)cpu_state_offset(_cycles));
                addlong((uint32_t)codegen_block_cycles);
        }
        if (codegen_block_ins) {
                addbyte(0x81); /*ADD $codegen_block_ins,ins*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(cpu_recomp_ins));
                addlong(codegen_block_ins);
        }
#if 0
	if (codegen_block_full_ins)
	{
		addbyte(0x81); /*ADD $codegen_block_ins,ins*/
		addbyte(0x04);
		addbyte(0x25);
		addlong((uint32_t)&cpu_recomp_full_ins);
		addlong(codegen_block_full_ins);
	}
#endif
        addbyte(0x48); /*ADDL $40,%rsp*/
        addbyte(0x83);
        addbyte(0xC4);
        addbyte(0x28);
        addbyte(0x41); /*POP R15*/
        addbyte(0x5f);
        addbyte(0x41); /*POP R14*/
        addbyte(0x5e);
        addbyte(0x41); /*POP R13*/
        addbyte(0x5d);
        addbyte(0x41); /*POP R12*/
        addbyte(0x5c);
        addbyte(0x5f); /*POP RDI*/
        addbyte(0x5e); /*POP RSI*/
        addbyte(0x5d); /*POP RBP*/
        addbyte(0x5b); /*POP RDX*/
        addbyte(0xC3); /*RET*/

        if (block_pos > BLOCK_GPF_OFFSET)
                fatal("Over limit!\n");

        remove_from_block_list(block, block->pc);
        block->next = block->prev = NULL;
        block->next_2 = block->prev_2 = NULL;
        codegen_block_generate_end_mask();
        add_to_block_list(block);
        //        pclog("End block %i\n", block_num);
}

void codegen_flush() { return; }

static int opcode_modrm[256] = {
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*00*/
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*10*/
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*20*/
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*30*/

        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/
        0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, /*60*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70*/

        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*80*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*90*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*a0*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*b0*/

        1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, /*d0*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*e0*/
        0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, /*f0*/
};
int opcode_0f_modrm[256] = {
        1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*00*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*10*/
        1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30*/

        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*40*/
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, /*60*/
        0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, /*70*/

        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*80*/
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*90*/
        0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /*a0*/
        1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, /*b0*/

        1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/
        0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*d0*/
        0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*e0*/
        0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0  /*f0*/
};

void codegen_debug() {
        if (output) {
                pclog("At %04x(%08x):%04x  %04x(%08x):%04x  es=%08x EAX=%08x BX=%04x ECX=%08x BP=%04x EDX=%08x EDI=%08x\n", CS,
                      cs, cpu_state.pc, SS, ss, ESP, es, EAX, BX, ECX, BP, EDX, EDI);
        }
}

static x86seg *codegen_generate_ea_16_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc) {
        if (!cpu_mod && cpu_rm == 6) {
                addbyte(0xC7); /*MOVL $0,(ssegs)*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(eaaddr));
                addlong((fetchdat >> 8) & 0xffff);
                (*op_pc) += 2;
        } else {
                int base_reg = 0, index_reg = 0;

                switch (cpu_rm) {
                case 0:
                case 1:
                case 7:
                        base_reg = LOAD_REG_W(REG_BX);
                        break;
                case 2:
                case 3:
                case 6:
                        base_reg = LOAD_REG_W(REG_BP);
                        break;
                case 4:
                        base_reg = LOAD_REG_W(REG_SI);
                        break;
                case 5:
                        base_reg = LOAD_REG_W(REG_DI);
                        break;
                }
                if (!(cpu_rm & 4)) {
                        if (cpu_rm & 1)
                                index_reg = LOAD_REG_W(REG_DI);
                        else
                                index_reg = LOAD_REG_W(REG_SI);
                }
                base_reg &= 7;
                index_reg &= 7;

                switch (cpu_mod) {
                case 0:
                        if (cpu_rm & 4) {
                                addbyte(0x41); /*MOVZX EAX, base_reg*/
                                addbyte(0x0f);
                                addbyte(0xb7);
                                addbyte(0xc0 | base_reg);
                        } else {
                                addbyte(0x67); /*LEA EAX, base_reg+index_reg*/
                                addbyte(0x43);
                                addbyte(0x8d);
                                if (base_reg == 5) {
                                        addbyte(0x44);
                                        addbyte(base_reg | (index_reg << 3));
                                        addbyte(0);
                                } else {
                                        addbyte(0x04);
                                        addbyte(base_reg | (index_reg << 3));
                                }
                        }
                        break;
                case 1:
                        if (cpu_rm & 4) {
                                addbyte(0x67); /*LEA EAX, base_reg+imm8*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                addbyte(0x40 | base_reg);
                                addbyte((fetchdat >> 8) & 0xff);
                        } else {
                                addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm8*/
                                addbyte(0x43);
                                addbyte(0x8d);
                                addbyte(0x44);
                                addbyte(base_reg | (index_reg << 3));
                                addbyte((fetchdat >> 8) & 0xff);
                        }
                        (*op_pc)++;
                        break;
                case 2:
                        if (cpu_rm & 4) {
                                addbyte(0x67); /*LEA EAX, base_reg+imm8*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                addbyte(0x80 | base_reg);
                                addlong((fetchdat >> 8) & 0xffff);
                        } else {
                                addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm16*/
                                addbyte(0x43);
                                addbyte(0x8d);
                                addbyte(0x84);
                                addbyte(base_reg | (index_reg << 3));
                                addlong((fetchdat >> 8) & 0xffff);
                        }
                        (*op_pc) += 2;
                        break;
                }
                if (cpu_mod || !(cpu_rm & 4)) {
                        addbyte(0x25); /*ANDL $0xffff, %eax*/
                        addlong(0xffff);
                }
                addbyte(0x89); /*MOV eaaddr, EAX*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(eaaddr));

                if (mod1seg[cpu_rm] == &ss && !op_ssegs)
                        op_ea_seg = &_ss;
        }
        return op_ea_seg;
}
//#if 0
static x86seg *codegen_generate_ea_32_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc,
                                           int stack_offset) {
        uint32_t new_eaaddr;

        if (cpu_rm == 4) {
                uint8_t sib = fetchdat >> 8;
                int base_reg = -1, index_reg = -1;

                (*op_pc)++;

                if (cpu_mod || (sib & 7) != 5)
                        base_reg = LOAD_REG_L(sib & 7) & 7;

                if (((sib >> 3) & 7) != 4)
                        index_reg = LOAD_REG_L((sib >> 3) & 7) & 7;

                if (index_reg == -1) {
                        switch (cpu_mod) {
                        case 0:
                                if ((sib & 7) == 5) {
                                        new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                                        addbyte(0xb8); /*MOV EAX, imm32*/
                                        addlong(new_eaaddr);
                                        (*op_pc) += 4;
                                } else {
                                        addbyte(0x44); /*MOV EAX, base_reg*/
                                        addbyte(0x89);
                                        addbyte(0xc0 | (base_reg << 3));
                                }
                                break;
                        case 1:
                                addbyte(0x67); /*LEA EAX, imm8+base_reg*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                if (base_reg == 4) {
                                        addbyte(0x44);
                                        addbyte(0x24);
                                } else {
                                        addbyte(0x40 | base_reg);
                                }
                                addbyte((fetchdat >> 16) & 0xff);
                                (*op_pc)++;
                                break;
                        case 2:
                                new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                                addbyte(0x67); /*LEA EAX, imm32+base_reg*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                if (base_reg == 4) {
                                        addbyte(0x84);
                                        addbyte(0x24);
                                } else {
                                        addbyte(0x80 | base_reg);
                                }
                                addlong(new_eaaddr);
                                (*op_pc) += 4;
                                break;
                        }
                } else {
                        switch (cpu_mod) {
                        case 0:
                                if ((sib & 7) == 5) {
                                        new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                                        if (sib >> 6) {
                                                addbyte(0x67); /*LEA EAX, imm32+index_reg*scale*/
                                                addbyte(0x42);
                                                addbyte(0x8d);
                                                addbyte(0x04);
                                                addbyte(0x05 | (sib & 0xc0) | (index_reg << 3));
                                                addlong(new_eaaddr);
                                        } else {
                                                addbyte(0x67); /*LEA EAX, imm32+index_reg*/
                                                addbyte(0x41);
                                                addbyte(0x8d);
                                                addbyte(0x80 | index_reg);
                                                addlong(new_eaaddr);
                                        }
                                        (*op_pc) += 4;
                                } else {
                                        addbyte(0x67); /*LEA EAX, base_reg+index_reg*scale*/
                                        addbyte(0x43);
                                        addbyte(0x8d);
                                        if (base_reg == 5) {
                                                addbyte(0x44);
                                                addbyte(base_reg | (index_reg << 3) | (sib & 0xc0));
                                                addbyte(0);
                                        } else {
                                                addbyte(0x04);
                                                addbyte(base_reg | (index_reg << 3) | (sib & 0xc0));
                                        }
                                }
                                break;
                        case 1:
                                addbyte(0x67); /*LEA EAX, imm8+base_reg+index_reg*scale*/
                                addbyte(0x43);
                                addbyte(0x8d);
                                addbyte(0x44);
                                addbyte(base_reg | (index_reg << 3) | (sib & 0xc0));
                                addbyte((fetchdat >> 16) & 0xff);
                                (*op_pc)++;
                                break;
                        case 2:
                                new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                                addbyte(0x67); /*LEA EAX, imm32+base_reg+index_reg*scale*/
                                addbyte(0x43);
                                addbyte(0x8d);
                                addbyte(0x84);
                                addbyte(base_reg | (index_reg << 3) | (sib & 0xc0));
                                addlong(new_eaaddr);
                                (*op_pc) += 4;
                                break;
                        }
                }
                if (stack_offset && (sib & 7) == 4 && (cpu_mod || (sib & 7) != 5)) /*ESP*/
                {
                        addbyte(0x05);
                        addlong(stack_offset);
                }
                if (((sib & 7) == 4 || (cpu_mod && (sib & 7) == 5)) && !op_ssegs)
                        op_ea_seg = &_ss;

                addbyte(0x89); /*MOV eaaddr, EAX*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(eaaddr));
        } else {
                int base_reg;

                if (!cpu_mod && cpu_rm == 5) {
                        new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                        addbyte(0xC7); /*MOVL $new_eaaddr,(eaaddr)*/
                        addbyte(0x45);
                        addbyte((uint8_t)cpu_state_offset(eaaddr));
                        addlong(new_eaaddr);
                        (*op_pc) += 4;
                        return op_ea_seg;
                }
                base_reg = LOAD_REG_L(cpu_rm) & 7;
                if (cpu_mod) {
                        if (cpu_rm == 5 && !op_ssegs)
                                op_ea_seg = &_ss;
                        if (cpu_mod == 1) {
                                addbyte(0x67); /*LEA EAX, base_reg+imm8*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                addbyte(0x40 | base_reg);
                                addbyte((fetchdat >> 8) & 0xff);
                                (*op_pc)++;
                        } else {
                                new_eaaddr = fastreadl(cs + (*op_pc) + 1);
                                addbyte(0x67); /*LEA EAX, base_reg+imm32*/
                                addbyte(0x41);
                                addbyte(0x8d);
                                addbyte(0x80 | base_reg);
                                addlong(new_eaaddr);
                                (*op_pc) += 4;
                        }
                        addbyte(0x89); /*MOV eaaddr, EAX*/
                        addbyte(0x45);
                        addbyte((uint8_t)cpu_state_offset(eaaddr));
                } else {
                        addbyte(0x44); /*MOV eaaddr, base_reg*/
                        addbyte(0x89);
                        addbyte(0x45 | (base_reg << 3));
                        addbyte((uint8_t)cpu_state_offset(eaaddr));
                }
        }
        return op_ea_seg;
}
//#endif
void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc) {
        codeblock_t *block = &codeblock[block_current];
        uint32_t op_32 = use32;
        uint32_t op_pc = new_pc;
        OpFn *op_table = x86_dynarec_opcodes;
        RecompOpFn *recomp_op_table = recomp_opcodes;
        int opcode_shift = 0;
        int opcode_mask = 0x3ff;
        int over = 0;
        int pc_off = 0;
        int test_modrm = 1;
        int c;

        op_ea_seg = &_ds;
        op_ssegs = 0;
        op_old_pc = old_pc;

        for (c = 0; c < NR_HOST_REGS; c++)
                host_reg_mapping[c] = -1;
        for (c = 0; c < NR_HOST_XMM_REGS; c++)
                host_reg_xmm_mapping[c] = -1;

        codegen_timing_start();

        while (!over) {
                switch (opcode) {
                case 0x0f:
                        op_table = x86_dynarec_opcodes_0f;
                        recomp_op_table = recomp_opcodes_0f;
                        over = 1;
                        break;

                case 0x26: /*ES:*/
                        op_ea_seg = &_es;
                        op_ssegs = 1;
                        break;
                case 0x2e: /*CS:*/
                        op_ea_seg = &_cs;
                        op_ssegs = 1;
                        break;
                case 0x36: /*SS:*/
                        op_ea_seg = &_ss;
                        op_ssegs = 1;
                        break;
                case 0x3e: /*DS:*/
                        op_ea_seg = &_ds;
                        op_ssegs = 1;
                        break;
                case 0x64: /*FS:*/
                        op_ea_seg = &_fs;
                        op_ssegs = 1;
                        break;
                case 0x65: /*GS:*/
                        op_ea_seg = &_gs;
                        op_ssegs = 1;
                        break;

                case 0x66: /*Data size select*/
                        op_32 = ((use32 & 0x100) ^ 0x100) | (op_32 & 0x200);
                        break;
                case 0x67: /*Address size select*/
                        op_32 = ((use32 & 0x200) ^ 0x200) | (op_32 & 0x100);
                        break;

                case 0xd8:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d8_a32 : x86_dynarec_opcodes_d8_a16;
                        recomp_op_table = recomp_opcodes_d8;
                        opcode_shift = 3;
                        opcode_mask = 0x1f;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xd9:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d9_a32 : x86_dynarec_opcodes_d9_a16;
                        recomp_op_table = recomp_opcodes_d9;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xda:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_da_a32 : x86_dynarec_opcodes_da_a16;
                        recomp_op_table = recomp_opcodes_da;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xdb:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_db_a32 : x86_dynarec_opcodes_db_a16;
                        recomp_op_table = recomp_opcodes_db;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xdc:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dc_a32 : x86_dynarec_opcodes_dc_a16;
                        recomp_op_table = recomp_opcodes_dc;
                        opcode_shift = 3;
                        opcode_mask = 0x1f;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xdd:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dd_a32 : x86_dynarec_opcodes_dd_a16;
                        recomp_op_table = recomp_opcodes_dd;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xde:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_de_a32 : x86_dynarec_opcodes_de_a16;
                        recomp_op_table = recomp_opcodes_de;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;
                case 0xdf:
                        op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_df_a32 : x86_dynarec_opcodes_df_a16;
                        recomp_op_table = recomp_opcodes_df;
                        opcode_mask = 0xff;
                        over = 1;
                        pc_off = -1;
                        test_modrm = 0;
                        block->flags |= CODEBLOCK_HAS_FPU;
                        break;

                case 0xf0: /*LOCK*/
                        break;

                case 0xf2: /*REPNE*/
                        op_table = x86_dynarec_opcodes_REPNE;
                        recomp_op_table = recomp_opcodes_REPNE;
                        break;
                case 0xf3: /*REPE*/
                        op_table = x86_dynarec_opcodes_REPE;
                        recomp_op_table = recomp_opcodes_REPE;
                        break;

                default:
                        goto generate_call;
                }
                fetchdat = fastreadl(cs + op_pc);
                codegen_timing_prefix(opcode, fetchdat);
                if (cpu_state.abrt)
                        return;
                opcode = fetchdat & 0xff;
                if (!pc_off)
                        fetchdat >>= 8;
                op_pc++;
        }

generate_call:
        codegen_timing_opcode(opcode, fetchdat, op_32);

        if ((op_table == x86_dynarec_opcodes && ((opcode & 0xf0) == 0x70 || (opcode & 0xfc) == 0xe0 || opcode == 0xc2 ||
                                                 (opcode & 0xfe) == 0xca || (opcode & 0xfc) == 0xcc || (opcode & 0xfc) == 0xe8 ||
                                                 (opcode == 0xff && ((fetchdat & 0x38) >= 0x10 && (fetchdat & 0x38) < 0x30)))) ||
            (op_table == x86_dynarec_opcodes_0f && ((opcode & 0xf0) == 0x80))) {
                /*Opcode is likely to cause block to exit, update cycle count*/
                if (codegen_block_cycles) {
                        addbyte(0x81); /*SUB $codegen_block_cycles, cyclcs*/
                        addbyte(0x6d);
                        addbyte((uint8_t)cpu_state_offset(_cycles));
                        addlong((uint32_t)codegen_block_cycles);
                        codegen_block_cycles = 0;
                }
                if (codegen_block_ins) {
                        addbyte(0x81); /*ADD $codegen_block_ins,ins*/
                        addbyte(0x45);
                        addbyte((uint8_t)cpu_state_offset(cpu_recomp_ins));
                        addlong(codegen_block_ins);
                        codegen_block_ins = 0;
                }
#if 0
		if (codegen_block_full_ins)
		{
			addbyte(0x81); /*ADD $codegen_block_ins,ins*/
			addbyte(0x04);
			addbyte(0x25);
			addlong((uint32_t)&cpu_recomp_full_ins);
			addlong(codegen_block_full_ins);
			codegen_block_full_ins = 0;
		}
#endif
        }

        if ((op_table == x86_dynarec_opcodes_REPNE || op_table == x86_dynarec_opcodes_REPE) && !op_table[opcode | op_32]) {
                op_table = x86_dynarec_opcodes;
                recomp_op_table = recomp_opcodes;
        }

        if (recomp_op_table && recomp_op_table[(opcode | op_32) & 0x1ff]) {
                uint32_t new_pc = recomp_op_table[(opcode | op_32) & 0x1ff](opcode, fetchdat, op_32, op_pc, block);
                if (new_pc) {
                        if (new_pc != -1)
                                STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, new_pc);

                        codegen_block_ins++;
                        block->ins++;
                        codegen_block_full_ins++;
                        codegen_endpc = (cs + cpu_state.pc) + 8;

                        return;
                }
        }

        op = op_table[((opcode >> opcode_shift) | op_32) & opcode_mask];
        //        if (output)
        //                pclog("Generate call at %08X %02X %08X %02X  %08X %08X %08X %08X %08X  %02X %02X %02X %02X\n",
        //                &codeblock[block_current][block_pos], opcode, new_pc, ram[old_pc], EAX, EBX, ECX, EDX, ESI,
        //                ram[0x7bd2+6],ram[0x7bd2+7],ram[0x7bd2+8],ram[0x7bd2+9]);
        if (op_ssegs != last_ssegs) {
                last_ssegs = op_ssegs;
                addbyte(0xC6); /*MOVB $0,(ssegs)*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(ssegs));
                addbyte(op_ssegs);
        }
        //#if 0
        if ((!test_modrm || (op_table == x86_dynarec_opcodes && opcode_modrm[opcode]) ||
             (op_table == x86_dynarec_opcodes_0f && opcode_0f_modrm[opcode])) /* && !(op_32 & 0x200)*/) {
                int stack_offset = 0;

                if (op_table == x86_dynarec_opcodes && opcode == 0x8f) /*POP*/
                        stack_offset = (op_32 & 0x100) ? 4 : 2;

                cpu_mod = (fetchdat >> 6) & 3;
                cpu_reg = (fetchdat >> 3) & 7;
                cpu_rm = fetchdat & 7;

                addbyte(0xC7); /*MOVL $rm | mod | reg,(rm_mod_reg_data)*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(rm_data.rm_mod_reg_data));
                addlong(cpu_rm | (cpu_mod << 8) | (cpu_reg << 16));

                op_pc += pc_off;
                if (cpu_mod != 3 && !(op_32 & 0x200))
                        op_ea_seg = codegen_generate_ea_16_long(op_ea_seg, fetchdat, op_ssegs, &op_pc);
                if (cpu_mod != 3 && (op_32 & 0x200))
                        op_ea_seg = codegen_generate_ea_32_long(op_ea_seg, fetchdat, op_ssegs, &op_pc, stack_offset);
                op_pc -= pc_off;
        }
        //#endif
        if (op_ea_seg != last_ea_seg) {
                //                last_ea_seg = op_ea_seg;
                addbyte(0xC7); /*MOVL $&_ds,(ea_seg)*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(ea_seg));
                addlong((uint32_t)(uintptr_t)op_ea_seg);
        }

        addbyte(0xC7); /*MOVL [pc],new_pc*/
        addbyte(0x45);
        addbyte((uint8_t)cpu_state_offset(pc));
        addlong(op_pc + pc_off);
        addbyte(0xC7); /*MOVL $old_pc,(oldpc)*/
        addbyte(0x45);
        addbyte((uint8_t)cpu_state_offset(oldpc));
        addlong(old_pc);
        if (op_32 != last_op32) {
                last_op32 = op_32;
                addbyte(0xC7); /*MOVL $use32,(op32)*/
                addbyte(0x45);
                addbyte((uint8_t)cpu_state_offset(op32));
                addlong(op_32);
        }

        load_param_1_32(block, fetchdat);
        call(block, (uintptr_t)op);

        codegen_block_ins++;

        block->ins++;

        addbyte(0x85); /*OR %eax, %eax*/
        addbyte(0xc0);
        addbyte(0x0F);
        addbyte(0x85); /*JNZ 0*/
        addlong((uint32_t)(uintptr_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(uintptr_t)(&block->data[block_pos + 4]));

        //	call(block, codegen_debug);

        codegen_endpc = (cs + cpu_state.pc) + 8;
}

#endif
